'''


exec(''.join(open("/home/dalton_m/ppp/pyfiles/CRppp_cntynaicsv1.py", encoding="utf8").readlines()[:]))
nohup python3 /home/dalton_m/ppp/pyfiles/CRppp_cntynaicsv1.py  | tee &
'''


import os

import pandas as pd

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
import sys

sys.path.append('/home/dalton_m/payload')
from basicfunctions import *

filename = 'CRppp_cntynaicsv1'

resultsloc1 = "/home/dalton_m/ppp/"

from datetime import date
datestr = date.today().strftime(format="%Y%m%d")
resultsloc = resultsloc1 + datestr + '/'
if not os.path.exists(resultsloc):
    os.makedirs(resultsloc)


logging.basicConfig(filename=resultsloc + filename + '.txt', level=logging.ERROR,
                    format='%(asctime)s - %(levelname)s - %(message)s')
logging.debug('This message should go to the log file')
logging.info('So should this')
logging.warning('And this, too')
logging.exception('And this, too')
logging.captureWarnings(True)

sys.path.append(resultsloc1 + 'pyfiles/')

#the quarter data is available
q=8
moncount = q*3 + 1

'''
************************************************************************************************
************************************************************************************************
************************************************************************************************
- employment
1) get post-2019 LDBs from employment file
2) get 2019 qcew data and keep ldbs from 1)
3) merge 2019 zip code
4) get monthly employment by zip code and naics 4 digit
5) load 1) again, load 2020, then 2021 zip code data and merge
6) post-2019 employment by zip- by month
- wages
-- same thing

************************************************************************************************
************************************************************************************************
************************************************************************************************
'''
'''
************************************************************************************************
************************************************************************************************
************************************************************************************************
- employment
1) get post-2019 LDBs from employment file
************************************************************************************************
************************************************************************************************
************************************************************************************************
'''
df1 = cudf.read_csv(dataloc + 'pppfiles/' + 'ANppp_ldbv1' + '_emp.csv')
ldblist = df1['ldb_num'].to_pandas().unique().tolist()

'''
************************************************************************************************
************************************************************************************************
************************************************************************************************
- employment
2) get 2019 qcew data and keep ldbs from 1)
3) merge 2019 zip code
4) get monthly employment by zip code and naics 4 digit

************************************************************************************************
************************************************************************************************
************************************************************************************************
'''
######get prev year seasonal emp data
yrm1 = 2019
fips = ''
qtr = ''
own = 5
# 'ui_acct', 'rpt_unit',
cols = ['aaemp', 'meei', 'naics_code']
dfldb = qcew(yrm1, qtr, fips, own, cols).rename(columns={'ui_acct': 'ui_acct',
                                                         'rpt_unit': 'run_num',
                                                         'naics_code': 'naics_ldb'})
dfldb = dfldb[(dfldb['meei'].isin([1, 3, 4, 5, 6])) & (dfldb['ldb_num'].isin(ldblist))].drop(columns='meei')
##geography
dfg = cudf.read_csv(dataloc + 'ldb/address/' + 'ldbaddressesv1_2019.psv', sep='|')[['ldb_num','cnty', 'fipsstate']].dropna().drop_duplicates(subset='ldb_num', keep = 'first')
dfg['fipscnty'] = dfg['fipsstate']*1000 + dfg['cnty']
dfg = cudf.concat([dfg[['ldb_num', 'fipscnty']], cudf.read_csv(dataloc + 'ldb/newgeog' + '20' + '.psv', sep='|')[['ldb_num', 'cnty']].dropna().rename(columns = {'cnty' : 'fipscnty'})]).drop_duplicates(subset='ldb_num', keep = 'first')

#merge
kcols = ['ldb_num', 'emp_m1', 'emp_m2', 'emp_m3','emp_m4', 'emp_m5', 'emp_m6',
 'emp_m7', 'emp_m8', 'emp_m9', 'emp_m10', 'emp_m11', 'emp_m12',
       'naics_ldb', ]
dfldb = dfldb[kcols].merge(dfg, on = ['ldb_num'], how = 'inner')

##naics4
dfldb['naics4'] = dfldb['naics_ldb'].to_pandas().astype('str').str[:4]
dfzip19 = dfldb.groupby(['fipscnty', 'naics4'])[['emp_m'+str(i) for i in range(1,13)]].sum().reset_index()
dfldb = None

'''
************************************************************************************************
************************************************************************************************
************************************************************************************************
- employment
5) load 1) again, load 2020, then 2021 zip code data and merge

************************************************************************************************
************************************************************************************************
************************************************************************************************
'''
########################
####### 2020
########################
##geography for 2020



##2020 data
yrm1 = 2020
fips = ''
qtr = ''
own = 5
# 'ui_acct', 'rpt_unit',
cols = [ 'meei', 'naics_code']
dfldb = qcew(yrm1, qtr, fips, own, cols).rename(columns={'ui_acct': 'ui_acct',
                                                         'rpt_unit': 'run_num',
                                                         'naics_code': 'naics_ldb'})
dfldb = dfldb[(dfldb['meei'].isin([1, 3, 4, 5, 6])) & (dfldb['ldb_num'].isin(ldblist))].drop(columns='meei')
dfldb['naics4'] = dfldb['naics_ldb'].to_pandas().astype('str').str[:4]
dfldb = dfldb[['ldb_num', 'naics4', ]].merge(dfg, on = ['ldb_num'], how = 'inner')
dfldb['fipscnty'] = pd.to_numeric(dfldb['fipscnty'].to_pandas(), errors='coerce')
dfldb = dfldb.dropna()
dfg = None
kdict = {}
for i in range(1,13):
    cond = (df1['num_month'] == i)
    kdict[i] = df1[cond].merge(dfldb, on = ['ldb_num'], how = 'inner').dropna().groupby(['fipscnty', 'naics4'])['emp_m'].sum().reset_index()
dfldb = None
########################
####### 2021
########################
##2021 data - need naics
#bring together each quarter of LDB extract for '21
dft1 = cudf.read_csv(dataloc + 'pppfiles/' + 'ANppp_ldbv1' + '_step1.csv')[['ldb_num', 'fipscnty', 'naics']]

dft1['naics4'] = dft1['naics'].to_pandas().astype('str').str[:4]

dft1 = dft1[['ldb_num', 'fipscnty', 'naics4']]
for i in range(13,moncount):
    cond = (df1['num_month'] == i)
    kdict[i] = df1[cond].merge(dft1, on = ['ldb_num'], how = 'inner').dropna().groupby(['fipscnty', 'naics4'])['emp_m'].sum().reset_index()
dft1 = None
'''
************************************************************************************************
************************************************************************************************
************************************************************************************************
6) post-2019 employment by zip-naics4 by month

************************************************************************************************
************************************************************************************************
************************************************************************************************
'''
dfzip19['fipscnty'] = pd.to_numeric(dfzip19['fipscnty'].to_pandas(), errors='coerce')
mc = ['fipscnty', 'naics4']
for i in range(1,13):
    j = i % 12
    if j==0:
        j = 12
    kcols = mc + ['emp_m'+ str(j)]
    renamedict = {'emp_m'+ str(j) : 'emp_19'}
    kdict[i].merge(dfzip19[kcols].rename(columns = renamedict), on = mc, how = 'outer').to_csv(dataloc + 'pppfiles/' + filename + '_' + str(i) + '.csv')


for i in range(13,moncount):
    j = i % 12
    if j==0:
        j = 12
    kcols = mc + ['emp_m'+ str(j)]
    renamedict = {'emp_m'+ str(j) : 'emp_19'}
    dft = kdict[i].copy()
    dft['fipscnty'] = pd.to_numeric(dft['fipscnty'].to_pandas(), errors='coerce')
    dft.merge(dfzip19[kcols].rename(columns = renamedict), on = mc, how = 'outer').to_csv(dataloc + 'pppfiles/' + filename + '_' + str(i) + '.csv')

'''
************************************************************************************************
************************************************************************************************
************************************************************************************************
- wages
1) get post-2019 LDBs from employment file
************************************************************************************************
************************************************************************************************
************************************************************************************************
'''
df1 = pd.read_csv(dataloc + 'pppfiles/' + 'ANppp_ldbv1' + '_wage.csv')
ldblist = df1['ldb_num'].unique().tolist()

'''
************************************************************************************************
************************************************************************************************
************************************************************************************************
- employment
2) get 2019 qcew data and keep ldbs from 1)
3) merge 2019 zip code
4) get monthly employment by zip code and naics 4 digit

************************************************************************************************
************************************************************************************************
************************************************************************************************
'''
######get prev year seasonal emp data
yrm1 = 2019
fips = ''
qtr = ''
own = 5
# 'ui_acct', 'rpt_unit',
cols = ['wages', 'meei', 'naics_code']
dfldb = qcew(yrm1, qtr, fips, own, cols).rename(columns={'ui_acct': 'ui_acct',
                                                         'rpt_unit': 'run_num',
                                                         'naics_code': 'naics_ldb'})
dfldb = dfldb[(dfldb['meei'].isin([1, 3, 4, 5, 6])) & (dfldb['ldb_num'].isin(ldblist))].drop(columns='meei')
##geography
dfg = cudf.read_csv(dataloc + 'ldb/address/' + 'ldbaddressesv1_2019.psv', sep='|')[['ldb_num','cnty', 'fipsstate']].dropna().drop_duplicates(subset='ldb_num', keep = 'first')
dfg['fipscnty'] = dfg['fipsstate']*1000 + dfg['cnty']
dfg = cudf.concat([dfg[['ldb_num', 'fipscnty']], cudf.read_csv(dataloc + 'ldb/newgeog' + '20' + '.psv', sep='|')[['ldb_num', 'cnty']].dropna().rename(columns = {'cnty' : 'fipscnty'})]).drop_duplicates(subset='ldb_num', keep = 'first')

#merge
kcols = ['ldb_num', 'wages_1','wages_2','wages_3','wages_4',
       'naics_ldb', ]
dfldb = dfldb[kcols].merge(dfg, on = ['ldb_num'], how = 'inner')

##naics4
dfldb['naics4'] = dfldb['naics_ldb'].to_pandas().astype('str').str[:4]
dfzip19 = dfldb.groupby(['fipscnty', 'naics4'])[['wages_'+str(i) for i in range(1,5)]].sum().reset_index()
dfldb = None

'''
************************************************************************************************
************************************************************************************************
************************************************************************************************
- wages
5) load 1) again, load 2020, then 2021 zip code data and merge

************************************************************************************************
************************************************************************************************
************************************************************************************************
'''
########################
####### 2020
########################
##geography for 2020

##2020 data
yrm1 = 2020
fips = ''
qtr = ''
own = 5
# 'ui_acct', 'rpt_unit',
cols = [ 'meei', 'naics_code']
dfldb = qcew(yrm1, qtr, fips, own, cols).rename(columns={'ui_acct': 'ui_acct',
                                                         'rpt_unit': 'run_num',
                                                         'naics_code': 'naics_ldb'})
dfldb = dfldb[(dfldb['meei'].isin([1, 3, 4, 5, 6])) & (dfldb['ldb_num'].isin(ldblist))].drop(columns='meei')
dfldb['naics4'] = dfldb['naics_ldb'].to_pandas().astype('str').str[:4]
dfldb = dfldb[['ldb_num', 'naics4', ]].merge(dfg, on = ['ldb_num'], how = 'inner')
dfldb['fipscnty'] = pd.to_numeric(dfldb['fipscnty'].to_pandas(), errors='coerce')
dfldb = dfldb.dropna()
dfg = None
kdict = {}
for i in range(1,13):
    cond = (df1['num_month'] == i)
    kdict[i] = df1[cond].merge(dfldb.to_pandas(), on = ['ldb_num'], how = 'inner').dropna().groupby(['fipscnty', 'naics4'])['wage_m'].sum().reset_index()
dfldb = None
########################
####### 2021
########################
##2021 data - need naics
#bring together each quarter of LDB extract for '21
dft1 = cudf.read_csv(dataloc + 'pppfiles/' + 'ANppp_ldbv1' + '_step1.csv')[['ldb_num', 'fipscnty', 'naics']]

dft1['naics4'] = dft1['naics'].to_pandas().astype('str').str[:4]
dft1 = dft1[['ldb_num', 'fipscnty', 'naics4']]
for i in range(13,moncount):
    cond = (df1['num_month'] == i)
    kdict[i] = df1[cond].merge(dft1.to_pandas(), on = ['ldb_num'], how = 'inner').dropna().groupby(['fipscnty', 'naics4'])['wage_m'].sum().reset_index()
dft1 = None
'''
************************************************************************************************
************************************************************************************************
************************************************************************************************
6) post-2019 employment by zip-naics4 by month

************************************************************************************************
************************************************************************************************
************************************************************************************************
'''
dfzip19['fipscnty'] = pd.to_numeric(dfzip19['fipscnty'].to_pandas(), errors='coerce')
mc = ['fipscnty', 'naics4']
for i in range(1,13):
    j = ((i-1) // 3) + 1
    if j > 4:
        j = j - 4
    kcols = mc + ['wages_'+ str(j)]
    renamedict = {'wages_'+ str(j) : 'wages_19'}
    kdict[i].merge(dfzip19[kcols].to_pandas().rename(columns = renamedict), on = mc, how = 'outer').to_csv(dataloc + 'pppfiles/' + filename + '_wage' + str(i) + '.csv')


for i in range(13,moncount):
    j = ((i-1) // 3) + 1
    if j > 4:
        j = j - 4
    kcols = mc + ['wages_'+ str(j)]
    renamedict = {'wages_'+ str(j) : 'wages_19'}
    dft = kdict[i].copy()
    dft['fipscnty'] = pd.to_numeric(dft['fipscnty'], errors='coerce')
    dft.merge(dfzip19[kcols].to_pandas().rename(columns = renamedict), on = mc, how = 'outer').to_csv(dataloc + 'pppfiles/' + filename + '_wage' + str(i) + '.csv')

